package com.saibo.spider.utils;

import java.util.ArrayList;
import java.util.List;

import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class SinaBlogScriptUtil {
	public static String a = "";

	/***
	 * 去除新浪页面的Script标签，取得Json集合
	 * @param scriptElements
	 * @return
	 */
	public static List<String> replaceScript(Elements scriptElements) {
		List<String> json_list = new ArrayList<String>();
		for (Element element : scriptElements) {
			String script_html = element.toString();// 转换成String类型
			script_html = script_html.replaceAll("<script>FM.view\\(", "");// 去除<script>FM.view(标签
			script_html = script_html.replaceAll("\\)</script>", "");// 去除)</script>标签
			// 现在script_html要么是个script标签内容，要么就是个Json
			if (!script_html.startsWith("{\"ns\"")) {
				continue;// 过滤杂项<script>标签
			}
			json_list.add(script_html);
		}
		return json_list;
	}

	
	
}
